#loading necessary libraries

suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("lubridate"))
suppressPackageStartupMessages(library("plotly"))
suppressPackageStartupMessages(library("ggplot2"))
library(lubridate)
library(plotly)
library(dplyr)
library(ggplot2)
vaccine <- read.csv("https://github.com/ayeayeronn/Covid-19-Vaccination/blob/main/ARCHIVED__COVID-19_Vaccine_Doses_Given_to_San_Franciscans_by_Demographics_Over_Time.csv?raw=true", sep = ",")
head(vaccine)
##   DATE_ADMINISTERED ADMINISTERING_PROVIDER_TYPE NEW_1ST_DOSES NEW_2ND_DOSES
## 1        2021/10/17               All Providers            15            17
## 2        2021/10/18               All Providers            24            40
## 3        2021/10/19               All Providers            39            30
## 4        2021/10/20               All Providers            28            41
## 5        2021/10/21               All Providers            31            40
## 6        2021/10/22               All Providers            27            40
##   NEW_SINGLE_DOSES NEW_SERIES_COMPLETED NEW_RECIPIENTS CUMULATIVE_1ST_DOSES
## 1                0                   17             15                52246
## 2                5                   45             29                52270
## 3                1                   31             40                52309
## 4                3                   44             31                52337
## 5                8                   48             40                52368
## 6                5                   45             32                52395
##   CUMULATIVE_2ND_DOSES CUMULATIVE_SINGLE_DOSES CUMULATIVE_SERIES_COMPLETED
## 1                47994                    4478                       52472
## 2                48034                    4483                       52517
## 3                48064                    4484                       52548
## 4                48105                    4487                       52592
## 5                48145                    4495                       52640
## 6                48185                    4500                       52685
##   CUMULATIVE_RECIPIENTS                                        OVERALL_SEGMENT
## 1                 56766 Ages 12+ by Age Bracket, Administered by All Providers
## 2                 56795 Ages 12+ by Age Bracket, Administered by All Providers
## 3                 56835 Ages 12+ by Age Bracket, Administered by All Providers
## 4                 56866 Ages 12+ by Age Bracket, Administered by All Providers
## 5                 56906 Ages 12+ by Age Bracket, Administered by All Providers
## 6                 56938 Ages 12+ by Age Bracket, Administered by All Providers
##   AGE_GROUP DEMOGRAPHIC_GROUP DEMOGRAPHIC_SUBGROUP SUBGROUP_POPULATION
## 1       12+       Age Bracket                18-24               62127
## 2       12+       Age Bracket                18-24               62127
## 3       12+       Age Bracket                18-24               62127
## 4       12+       Age Bracket                18-24               62127
## 5       12+       Age Bracket                18-24               62127
## 6       12+       Age Bracket                18-24               62127
##   AGE_GROUP_POPULATION DEMOGRAPHIC_SUBGROUP_SORT_ORDER NEW_BOOSTER_DOSES
## 1               791131                               4                15
## 2               791131                               4                41
## 3               791131                               4                48
## 4               791131                               4                29
## 5               791131                               4                45
## 6               791131                               4                71
##   NEW_BOOSTER_RECIPIENTS CUMULATIVE_BOOSTER_DOSES CUMULATIVE_BOOSTER_RECIPIENTS
## 1                     15                      880                           871
## 2                     41                      921                           912
## 3                     48                      969                           960
## 4                     29                      998                           989
## 5                     45                     1043                          1034
## 6                     71                     1114                          1105
##   NEW_2ND_BOOSTER_RECIPIENTS CUMULATIVE_2ND_BOOSTER_RECIPIENTS
## 1                          0                                 4
## 2                          0                                 4
## 3                          0                                 4
## 4                          0                                 4
## 5                          0                                 4
## 6                          0                                 4
##               DATA_AS_OF        DATA_UPDATED_AT         DATA_LOADED_AT
## 1 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 2 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 3 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 4 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 5 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 6 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
dim(vaccine)
## [1] 93576    28
vaccine <- vaccine %>% 
  mutate(DATE_ADMINISTERED = ymd(DATE_ADMINISTERED))
class(vaccine$DATE_ADMINISTERED)
## [1] "Date"

2)

Number of first doses before June 15

vaccine %>% 
  filter(DATE_ADMINISTERED < "2021-06-15") %>% 
  summarise(num_new_doses_before_june15 = sum(NEW_1ST_DOSES))
##   num_new_doses_before_june15
## 1                     5188428

Number of first doses after June 15

vaccine %>% 
  filter(DATE_ADMINISTERED > "2021-06-15") %>% 
  summarise(num_new_doses_after_june15 = sum(NEW_1ST_DOSES))
##   num_new_doses_after_june15
## 1                    1119615

Number of first doses as of September 12

vaccine %>% 
  filter(DATE_ADMINISTERED <= "2021-09-12") %>% 
  summarise(num_new_doses_of_sep12 = sum(NEW_1ST_DOSES))
##   num_new_doses_of_sep12
## 1                5555787

3)

Number of single vs first vs second doses before June 15th

vaccine %>% 
  filter(DATE_ADMINISTERED < "2021-06-15") %>% 
  summarise(total_single_dose_before_June15 = sum(NEW_SINGLE_DOSES), num_new_doses_before_june15 = sum(NEW_1ST_DOSES), num_2nd_doses_before_June15 = sum(NEW_2ND_DOSES))
##   total_single_dose_before_June15 num_new_doses_before_june15
## 1                          372574                     5188428
##   num_2nd_doses_before_June15
## 1                     4822264

Number of single vs first vs second doses after June 15th

vaccine %>% 
  filter(DATE_ADMINISTERED > "2021-06-15") %>% 
  summarise(total_single_dose_after_June15 = sum(NEW_SINGLE_DOSES), num_new_doses_after_june15 = sum(NEW_1ST_DOSES), num_2nd_doses_after_june15 = sum(NEW_2ND_DOSES))
##   total_single_dose_after_June15 num_new_doses_after_june15
## 1                          91336                    1119615
##   num_2nd_doses_after_june15
## 1                    1007085

Number of single vs first vs second doses as of September 12

vaccine %>% 
  filter(DATE_ADMINISTERED < "2021-09-12") %>% 
  summarise(num_single_dose_of_Sep12 = sum(NEW_SINGLE_DOSES), num_new_doses_Of_Sep122021 = sum(NEW_1ST_DOSES), num_2nd_doses_of_Sep12 = sum(NEW_2ND_DOSES))
##   num_single_dose_of_Sep12 num_new_doses_Of_Sep122021 num_2nd_doses_of_Sep12
## 1                   430311                    5554580                5229592

4)

vaccine <- vaccine %>% 
  filter(DATE_ADMINISTERED < "2021-09-18") %>% 
  mutate(MONTH = month(DATE_ADMINISTERED)) 

December and January have a low number of vaccinations because only healthcare and essential workers had access to the vaccine. February, March, and April have the highest number of vaccinations because the vaccine distribution has been rolling out on a tier system. As the months go on, it seems like the population getting vaccinated is reaching a plateau.

vaccine %>%
  group_by(MONTH) %>% 
  mutate(MONTH = month(MONTH, label = TRUE)) %>%
  summarise(num_of_vaccinations = sum(NEW_1ST_DOSES) + sum(NEW_2ND_DOSES) + sum(NEW_SINGLE_DOSES)) %>% 
  arrange(desc(num_of_vaccinations))
## # A tibble: 10 × 2
##    MONTH num_of_vaccinations
##    <ord>               <int>
##  1 Apr               2960163
##  2 Mar               2857746
##  3 May               1676516
##  4 Feb               1671916
##  5 Jan                749954
##  6 Jun                558723
##  7 Aug                276478
##  8 Jul                266207
##  9 Sep                125839
## 10 Dec                114068

5)

vaccine %>% select(DEMOGRAPHIC_GROUP, DEMOGRAPHIC_SUBGROUP) %>% 
  filter(DEMOGRAPHIC_GROUP == "Race/Ethnicity") %>% 
  unique()
##      DEMOGRAPHIC_GROUP                      DEMOGRAPHIC_SUBGROUP
## 1       Race/Ethnicity                                     Asian
## 278     Race/Ethnicity                 Black or African American
## 554     Race/Ethnicity Native Hawaiian or Other Pacific Islander
## 790     Race/Ethnicity           Hispanic or Latino/a, all races
## 1103    Race/Ethnicity                                Other Race
## 1170    Race/Ethnicity          American Indian or Alaska Native
## 1588    Race/Ethnicity                                   Unknown
## 1864    Race/Ethnicity                              Multi-Racial
## 2642    Race/Ethnicity                                     White
vaccine <- vaccine %>% 
  mutate(RACE = ifelse(DEMOGRAPHIC_SUBGROUP %in% c("American Indian or Alaska Native", "Native Hawaiian or Other Pacific Islander", "Asian", "Hispanic or Latino/a, all races", "Multi-Racial", "Other Race", "White", "Black or African American","Unknown"), DEMOGRAPHIC_SUBGROUP, "N/A"))
vaccine %>% 
  filter(RACE != "N/A") %>%
  group_by(RACE, DATE_ADMINISTERED) %>% 
  summarise(dose = sum(NEW_1ST_DOSES)) %>%  
  ggplot(., aes(x = DATE_ADMINISTERED, y = dose, color = RACE)) + geom_line() + theme(legend.position = c(0.65, 0.6)) +
  labs(title = "Number of Covid Vaccines by Race", x = "Date Administered", y = "Total number of Doses")
## `summarise()` has grouped output by 'RACE'. You can override using the
## `.groups` argument.

ggplotly() %>% layout(width = 1700, height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()

6)

suppressPackageStartupMessages(library("reshape"))
library(reshape)
combined <- vaccine %>% 
  select(DATE_ADMINISTERED,NEW_1ST_DOSES, NEW_2ND_DOSES, NEW_SINGLE_DOSES)
combined[] <-lapply(combined, unlist)
str(combined)
## 'data.frame':    36683 obs. of  4 variables:
##  $ DATE_ADMINISTERED: Date, format: "2021-03-16" "2021-03-17" ...
##  $ NEW_1ST_DOSES    : int  64 66 32 34 49 26 51 52 60 59 ...
##  $ NEW_2ND_DOSES    : int  3 3 8 10 10 6 11 11 9 24 ...
##  $ NEW_SINGLE_DOSES : int  0 0 0 0 0 0 0 1 0 0 ...
combined <- melt(combined, "DATE_ADMINISTERED")
combined %>% 
  group_by(DATE_ADMINISTERED, variable) %>% 
  summarise(num_dose = sum(value)) %>%
  ggplot(., aes(x=DATE_ADMINISTERED, y=num_dose, color=variable)) + geom_line() +
  labs(title = "Number of Covid Vaccines by Dose", x = "Date Administered", y = "Total Number of Doses") +
  scale_color_discrete(name = "Type of Dose",
                       labels = c("New 1st Doses", "New 2nd Doses", "New Single Doses"))
## `summarise()` has grouped output by 'DATE_ADMINISTERED'. You can override using
## the `.groups` argument.

ggplotly() %>% layout(width = 1700, height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()